
*** This file does the entire reduced-form analysis, including creating the reduced-form tables.
*** Make sure to have the following files in the Working Directory prior to running
* (i) Indiv_AnalysisSample_imp_Jun2021.dta
* (ii) Linkwise_Analysis_Sample_Jun2021.dta
* (iii) IDs_and_levels.dta


** TABLE 1 -- VARIABLE DESCRIPTIVES **
use "Indiv_AnalysisSample_imp_Jun2021", clear

mat mat_BL_desc = J(10,2,.)
local k=0
foreach j in E std7 std8 SC ST OBC t0_index_educ t0_index_gender t1_index_educ t1_index_gender {
	summ `j'
	mat mat_BL_desc[`k'+1,1] = r(mean)
	mat mat_BL_desc[`k'+1,2] = r(sd)
	local k=`k'+1
}
mat list mat_BL_desc
esttab mat(mat_BL_desc, fmt(%9.3f)) using Table1.tex, replace 



	

** TABLE 2 -- NETWORK VARIABLE DESCRIPTIVES

use "Linkwise_Analysis_Sample_Jun2021", clear

local t1_pca_list = "q11_frnd q13_speak q14_spemt_out_sch q15_clever q16_lot_friends q16_shy q17_confident q18_wish_like q19_trust"
local t1_static_list = "q9_relatives q10_caste q12_home"

local t1_pca_RH = ""
foreach j in `t1_pca_list' {
	local t1_pca_RH = "`t1_pca_RH' t1_out_`j'"
}

mat mat_links_desc_EL = J(12,3,.)
local k=0
pca `t1_pca_RH', components(1)
mat t1_firstpc = e(L)
foreach j in `t1_pca_list' {
	summ t1_out_`j'
	mat mat_links_desc_EL[`k'+1,1] = r(mean)
	corr t1_out_`j' t1_in_`j'
	mat c = r(C)
	mat mat_links_desc_EL[`k'+1,2] = c[2,1]	
	*mat mat_links_desc_EL[`k'+1,2] = r(sd)
	mat mat_links_desc_EL[`k'+1,3] = t1_firstpc[`k'+1,1]
	local k=`k'+1
}

foreach j in `t1_static_list' {
	summ t1_out_`j'
	mat mat_links_desc_EL[`k'+1,1] = r(mean)
	corr t1_out_`j' t1_in_`j'
	mat c = r(C)
	mat mat_links_desc_EL[`k'+1,2] = c[2,1]	
	*mat mat_links_desc_EL[`k'+1,2] = r(sd)
	local k=`k'+1
}
mat list mat_links_desc_EL
esttab mat(mat_links_desc_EL, fmt(%9.3f)) using Table2.tex, replace 






** TABLE 3 -- NETWORK SIZE AND COMPLEMENTARITY
use "Indiv_AnalysisSample_imp_Jun2021", clear
gen schl_size = 1
collapse (sum) schl_size, by(schl_code)
tempfile S_Size
save `S_Size', replace



use "Linkwise_Analysis_Sample_Jun2021", clear
collapse (sum) t0_connect* t1_connect* t0_frnd* t1_frnd*, by(schl_code enroll_num_self)
foreach def in frnd_OUT frnd_IN frnd_AND frnd_OR connect_OUT connect_IN connect_SUM {
	foreach t in t0 t1 {
		rename `t'_`def' `t'_`def'_ct
	}
}
rename enroll_num_self enroll_num
merge m:1 schl_code using `S_Size'
drop _m

keep schl_code enroll_num *ct schl_size
tempfile a3
save `a3', replace


use `a3', clear
foreach def in t0_connect_OUT t0_frnd_OUT {
	reg `def'_ct schl_size, cluster(schl_code)
	outreg2 using Table3.tex, stats(coef se) append bdec(3) sdec(3)

}


use "Linkwise_Analysis_Sample_Jun2021", clear
merge m:1 schl_code using `S_Size'
drop _m
foreach def in frnd connect {
	reg t0_`def'_OUT schl_size, cluster(schl_code)
	outreg2 using Table3.tex, stats(coef se) append bdec(3) sdec(3)
}


foreach def in frnd connect {
	gen Link_IN = t0_`def'_IN
	reg t0_`def'_OUT Link_IN, cluster(schl_code)
	outreg2 using Table3.tex, stats(coef se) append bdec(3) sdec(3)
	drop Link_IN
}


** TABLE 4 -- RF TEs on Outcomes
use "Indiv_AnalysisSample_imp_Jun2021", clear

gen NP = 1 if T2==1 & P==0
replace NP = 0 if NP==.

foreach ind in educ gender {
	gen t0_var = t0_index_`ind'
	reg t1_index_`ind' P NP if T1==0 & t1_index_`ind'_mis==0, cluster(schl_code)
	outreg2 using Table4.tex, stats(coef se) append bdec(3) sdec(3)
	sleep 300
	
	reg t1_index_`ind' P NP t0_var if T1==0 & t1_index_`ind'_mis==0, cluster(schl_code)
	outreg2 using Table4.tex, stats(coef se) append bdec(3) sdec(3)
	sleep 300	

	drop t0_var
}
	
	
** TABLE 5 -- RF TEs on Networks
use "IDs_and_levels", clear
gen Gen = 1 if SC==0 & ST==0 & OBC==0
gen Caste = SC
replace Caste = 2 if ST==1
replace Caste = 3 if OBC==1
replace Caste = 4 if Gen==1
foreach j of varlist enroll_num E std7 std8 SC ST OBC P t0_index* t1_index* source_* Caste {
	rename `j' `j'_alter
}
tempfile Levels2_alter
save `Levels2_alter', replace

use "IDs_and_levels", clear
gen Gen = 1 if SC==0 & ST==0 & OBC==0
gen Caste = SC
replace Caste = 2 if ST==1
replace Caste = 3 if OBC==1
replace Caste = 4 if Gen==1
foreach j of varlist enroll_num E std7 std8 SC ST OBC P t0_index* t1_index* source_* Caste {
	rename `j' `j'_self
}
tempfile Levels2_self
save `Levels2_self', replace


use "Linkwise_Analysis_Sample_Jun2021", clear
merge m:1 schl_code enroll_num_self using `Levels2_self'
drop _m
merge m:1 schl_code enroll_num_alter using `Levels2_alter'
drop _m


gen P_AND = P_self*P_alter
local k = 0
foreach def in frnd connect {
	gen t1_netvar = t1_`def'_OUT
	gen t0_netvar_OUT = t0_`def'_OUT
	gen t0_netvar_IN = t0_`def'_IN
	if `k' == 0 {
		gen t0_netvar_int = t0_`def'_AND
	}
	if `k' ~= 0 {
		gen t0_netvar_int = t0_`def'_SUM
	}
	
	corr *netvar*
	
	*reg t1_netvar P_self P_alter P_AND if T2==1 & t1_out_NETMISS==0, cluster(schl_code)
	reg t1_netvar P_self P_alter P_AND if T2==1, cluster(schl_code)
	test (P_self+P_alter+P_AND=0)
	local pval = r(p)
	outreg2 using Table5.tex, stats(coef se) addstat(P-value of Test, `pval') append bdec(3) sdec(3) adec(3)

	*reg t1_netvar P_self P_alter P_AND t0_netvar_OUT t0_netvar_IN if T2==1 & t1_out_NETMISS==0, cluster(schl_code)
	reg t1_netvar P_self P_alter P_AND t0_netvar_OUT t0_netvar_IN if T2==1, cluster(schl_code)
	test (P_self+P_alter+P_AND=0)
	local pval = r(p)
	outreg2 using Table5.tex, stats(coef se) addstat(P-value of Test, `pval') append bdec(3) sdec(3) adec(3)
	
	local k = `k' + 1
	drop *netvar*
}

	
	



** APPENDIX TABLE A5 -- BASELINE BALANCE

use "Indiv_AnalysisSample_imp_Jun2021", clear

mat mat_BL_bal = J(20,7,.)
local covar = "E std7 std8 SC ST OBC"
*local t0_binary_OR_covar = "t0_binary_OR_E t0_binary_OR_std7 t0_binary_OR_std8 t0_binary_OR_SC t0_binary_OR_ST t0_binary_OR_OBC"
*local t0_cont_SUM_covar = "t0_cont_SUM_E t0_cont_SUM_std7 t0_cont_SUM_std8 t0_cont_SUM_SC t0_cont_SUM_ST t0_cont_SUM_OBC"

local k=0
foreach j in `covar' t0_index_educ t0_index_gender t1_index_educ t1_index_gender {
	reg `j' T1 T2 , cluster(schl_code)
	test T1 T2
	mat mat_BL_bal[`k'+1,4] = r(p)
	
	mean `j' if T1==1 , cluster(schl_code)
	mat mat_BL_bal[`k'+1,1] = _b[`j']
	mat mat_BL_bal[`k'+2,1] = _se[`j']
	
	mean `j' if T2==1 , cluster(schl_code)
	mat mat_BL_bal[`k'+1,2] = _b[`j']
	mat mat_BL_bal[`k'+2,2] = _se[`j']
	
	mean `j' if T1==0 & T2==0 , cluster(schl_code)
	mat mat_BL_bal[`k'+1,3] = _b[`j']
	mat mat_BL_bal[`k'+2,3] = _se[`j']
	
	local k=`k'+2
}

local k=0
foreach j in `covar' t0_index_educ t0_index_gender t1_index_educ t1_index_gender {
	reg `j' P if T2==1 , cluster(schl_code)
	test P
	mat mat_BL_bal[`k'+1,7] = r(p)
	
	mean `j' if T2==1 & P==1, cluster(schl_code)
	mat mat_BL_bal[`k'+1,5] = _b[`j']
	mat mat_BL_bal[`k'+2,5] = _se[`j']
	
	mean `j' if T2==1 & P==0, cluster(schl_code)
	mat mat_BL_bal[`k'+1,6] = _b[`j']
	mat mat_BL_bal[`k'+2,6] = _se[`j']
	
	
	local k=`k'+2
}

mat list mat_BL_bal

esttab mat(mat_BL_bal, fmt(%9.3f)) using TableA5.tex, replace 




** TABLE A6 -- KSMIRNOV TESTS

mat KS_mat = J(4,2,.)
local k1 = 0
forvalues t=0(1)1 {
	foreach ind in educ gender {
		ksmirnov t`t'_index_`ind' if T1==0 & t`t'_index_`ind'_mis==0, by(T2)
		mat KS_mat[`k1'+1,1] = r(p)
		
		ksmirnov t`t'_index_`ind' if T2==0 & t`t'_index_`ind'_mis==0, by(T1)
		mat KS_mat[`k1'+1,2] = r(p)
		local k1 = `k1'+1
	}
}
mat list KS_mat
esttab mat(KS_mat, fmt(%9.3f)) using TableA6.tex, replace 





** TABLE A7 -- OUTCOME HETEROGENEITY **
forvalues t=0(1)1 {
	foreach ind in educ gender {
		reg t`t'_index_`ind' E std7 std8 SC ST OBC if t`t'_index_`ind'_mis==0, cluster(schl_code)
		outreg2 using TableA7.tex, stats(coef se) append bdec(3) sdec(3)
	}
}


** TABLE A8 -- NETWORK VARIABLE DESCRIPTIVES (ALTERNATIVE DEFINITION)
use "Linkwise_Analysis_Sample_Jun2021", clear

local t1_pca_list_alt = "q11_frnd q13_speak q14_spemt_out_sch q18_wish_like q19_trust"
local t1_pca_RH_alt = ""
foreach j in `t1_pca_list_alt' {
	local t1_pca_RH_alt = "`t1_pca_RH_alt' t1_out_`j'"
}

mat mat_links_desc_EL_alt = J(5,3,.)
local k=0
pca `t1_pca_RH_alt', components(1)
mat t1_firstpc = e(L)
foreach j in `t1_pca_list_alt' {
	summ t1_out_`j'
	mat mat_links_desc_EL_alt[`k'+1,1] = r(mean)
	corr t1_out_`j' t1_in_`j'
	mat c = r(C)
	mat mat_links_desc_EL_alt[`k'+1,2] = c[2,1]	
	*mat mat_links_desc_EL[`k'+1,2] = r(sd)
	mat mat_links_desc_EL_alt[`k'+1,3] = t1_firstpc[`k'+1,1]
	local k=`k'+1
}

mat list mat_links_desc_EL_alt
esttab mat(mat_links_desc_EL_alt, fmt(%9.3f)) using TableA8.tex, replace 






** TABLE A9 -- HOMOPHILY BY POPULATION GROUP
use "IDs_and_levels", clear
gen Gen = 1 if SC==0 & ST==0 & OBC==0
gen Caste = SC
replace Caste = 2 if ST==1
replace Caste = 3 if OBC==1
replace Caste = 4 if Gen==1
foreach j of varlist enroll_num E std7 std8 SC ST OBC P t0_index* t1_index* source_* Caste {
	rename `j' `j'_alter
}
tempfile Levels2_alter
save `Levels2_alter', replace

use "IDs_and_levels", clear
gen Gen = 1 if SC==0 & ST==0 & OBC==0
gen Caste = SC
replace Caste = 2 if ST==1
replace Caste = 3 if OBC==1
replace Caste = 4 if Gen==1
foreach j of varlist enroll_num E std7 std8 SC ST OBC P t0_index* t1_index* source_* Caste {
	rename `j' `j'_self
}
tempfile Levels2_self
save `Levels2_self', replace


use "Linkwise_Analysis_Sample_Jun2021", clear
merge m:1 schl_code enroll_num_self using `Levels2_self'
drop _m
merge m:1 schl_code enroll_num_alter using `Levels2_alter'
drop _m

matrix t1_mat_Caste=J(16,5,.)
local j1 = 0
foreach def in frnd_OUT connect_OUT {
	forvalues j=1/4 {
		gen SameCaste = 1 if Caste_alter==`j'
		replace SameCaste = 0 if SameCaste==.
		reg t1_`def' SameCaste if Caste_self==`j', cluster(schl_code)
		test SameCaste
		mat t1_mat_Caste[2*`j1'+1, 5] = r(p)
		drop SameCaste
		local k1 = 0
		forvalues k=1/4 {
			mean t1_`def' if Caste_self==`j' & Caste_alter==`k'  , cluster(schl_code)
			matrix b = e(b)
			mat t1_mat_Caste[2*`j1'+1,`k1'+1] = b[1,1]
			matrix s = e(V)
			mat t1_mat_Caste[2*`j1'+2,`k1'+1] = sqrt(s[1,1])
			local k1 = `k1' + 1
		}
		
		local j1 = `j1' + 1
		
	}
}
mat list t1_mat_Caste
esttab mat(t1_mat_Caste, fmt(%9.3f)) using TableA9.tex, replace




** TABLE A10 -- HOMOPHILY BY POPULATION GROUP (ALTERNATIVE DEF)
matrix t1_mat_Caste_alt=J(8,5,.)
local j1 = 0
foreach def in connect_OUT_alt {
	forvalues j=1/4 {
		gen SameCaste = 1 if Caste_alter==`j'
		replace SameCaste = 0 if SameCaste==.
		reg t1_`def' SameCaste if Caste_self==`j', cluster(schl_code)
		test SameCaste
		mat t1_mat_Caste_alt[2*`j1'+1, 5] = r(p)
		drop SameCaste
		local k1 = 0
		forvalues k=1/4 {
			mean t1_`def' if Caste_self==`j' & Caste_alter==`k'  , cluster(schl_code)
			matrix b = e(b)
			mat t1_mat_Caste_alt[2*`j1'+1,`k1'+1] = b[1,1]
			matrix s = e(V)
			mat t1_mat_Caste_alt[2*`j1'+2,`k1'+1] = sqrt(s[1,1])
			local k1 = `k1' + 1
		}
		
		local j1 = `j1' + 1
		
	}
}
mat list t1_mat_Caste_alt
esttab mat(t1_mat_Caste_alt, fmt(%9.3f)) using TableA10.tex, replace







** TABLE A11 -- DEFINE LMHhat **
use "Indiv_AnalysisSample_imp_Jun2021", clear
foreach ind in educ gender {
	gen t0_var = t0_index_`ind'
	reg t1_index_`ind' E std7 std8 SC ST OBC t0_var if T1==0 & T2==0 & t1_index_`ind'_mis==0, cluster(schl_code)
	outreg2 using TableA11.tex, stats(coef se) append bdec(3) sdec(3)
	drop t0_var
}






** TABLE A12 -- TE Heterogeneity on Hhat, Mhat, Lhat
use "Indiv_AnalysisSample_imp_Jun2021", clear

gen NP = 1 if T2==1 & P==0
replace NP = 0 if NP==.


foreach ind in educ gender {
	gen Mhat_`ind' = 1-Lhat_`ind'-Hhat_`ind'
	gen t0_var = t0_index_`ind'
	foreach j in L M H {
		gen `j'hat_var = `j'hat_`ind'
		gen P_`j'hat_var = P*`j'hat_`ind'
		gen NP_`j'hat_var = NP*`j'hat_`ind'
		gen t0_var_`j'hat_var = t0_var*`j'hat_var
	}
	
	local LMHhat = "Lhat_var Mhat_var Hhat_var"
	local P_LMHhat = "P_Lhat_var P_Mhat_var P_Hhat_var"
	local NP_LMHhat = "NP_Lhat_var NP_Mhat_var NP_Hhat_var"
	local t0_var_LMHhat = "t0_var_Lhat_var t0_var_Mhat_var t0_var_Hhat_var"
	
	reg t1_index_`ind' `LMHhat' `P_LMHhat' `NP_LMHhat' if T1==0& t1_index_`ind'_mis==0, nocons cluster(schl_code)
	test (P_Lhat_var = P_Mhat_var) (P_Mhat_var = P_Hhat_var) (P_Hhat_var = P_Lhat_var)
	local t1 = r(p)
	test (NP_Lhat_var = NP_Mhat_var) (NP_Mhat_var = NP_Hhat_var) (NP_Hhat_var = NP_Lhat_var)
	local t2 = r(p)
	outreg2 using TableA12.tex, stats(coef se) bdec(3) sdec(3) adec(3) append addstat(Test1 P-value, `t1', Test2 P-value, `t2')
	

	reg t1_index_`ind' `LMHhat' `P_LMHhat' `NP_LMHhat' `t0_var_LMHhat' if T1==0& t1_index_`ind'_mis==0, nocons cluster(schl_code)
	test (P_Lhat_var = P_Mhat_var) (P_Mhat_var = P_Hhat_var) (P_Hhat_var = P_Lhat_var)
	local t1 = r(p)
	test (NP_Lhat_var = NP_Mhat_var) (NP_Mhat_var = NP_Hhat_var) (NP_Hhat_var = NP_Lhat_var)
	local t2 = r(p)
	outreg2 using TableA12.tex, stats(coef se) bdec(3) sdec(3) adec(3) append addstat(Test1 P-value, `t1', Test2 P-value, `t2')
	
	drop *var
}

